package com.xp.ai.ragdemo;

import dev.langchain4j.data.document.Document;
import dev.langchain4j.data.document.DocumentSplitter;
import dev.langchain4j.data.segment.TextSegment;

import java.util.ArrayList;
import java.util.List;


/**
 * 自定义文本文件解析器
 * 增加规则 解析文本，
 */
public class MySplitter implements DocumentSplitter {

    final String delimiter = "(?=^\\d+\\.\\s+\\*\\*Q：)";


    /***
     * 实现文档解析逻辑
     * @param document The Document to be split.
     * @return 返回解析的文档段
     */
    @Override
    public List<TextSegment> split(Document document) {
        List<TextSegment> textSegments = new ArrayList<>();
        String[] split = document.text().split(delimiter);
        for (String s : split) {
            textSegments.add(TextSegment.from(s));
        }
        return textSegments;
    }
}
